{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 抓取全过程代码\n",
    "\n",
    "相关的url：https://www.qidian.com/finish\n",
    "url相关详细链接：https://www.qidian.com/finish?action=hidden&page=1&style=1&pageSize=20&siteid=1&pubflag=0&hiddenField=2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>大分类</th>\n",
       "      <th>小分类</th>\n",
       "      <th>简介</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>诡秘之主</td>\n",
       "      <td>爱潜水的乌贼轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>蒸汽与机械的浪潮中，谁能触及非凡？历史和黑暗的迷雾里，又是谁在耳语？我从诡秘中醒来，睁眼看见...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>学霸的黑科技系统</td>\n",
       "      <td>晨星LL轻小说</td>\n",
       "      <td>科幻</td>\n",
       "      <td>超级科技</td>\n",
       "      <td>“系统，积分能兑钱吗？”“不能。”“那我要你何用！”“本系统能让你当上学霸，你还要钱干啥？”...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>大王饶命</td>\n",
       "      <td>会说话的肘子轻小说</td>\n",
       "      <td>都市</td>\n",
       "      <td>都市生活</td>\n",
       "      <td>高中生吕树在一场车祸中改变人生，当灵气复苏时代来袭，他要做这时代的领跑者。物竞天择，胜者为王...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>出名太快怎么办</td>\n",
       "      <td>十步杀一仙</td>\n",
       "      <td>都市</td>\n",
       "      <td>娱乐明星</td>\n",
       "      <td>由于歌手意外缺席，迫于压力，王桓站出来唱了一首歌……没想到就这样出名了。（书友群：48042...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>牧神记</td>\n",
       "      <td>宅猪</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>大墟的祖训说，天黑，别出门。大墟残老村的老弱病残们从江边捡到了一个婴儿，取名秦牧，含辛茹苦将...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>15</td>\n",
       "      <td>狂探</td>\n",
       "      <td>旷海忘湖</td>\n",
       "      <td>都市</td>\n",
       "      <td>都市异能</td>\n",
       "      <td>一个无节操的小痞子，意外穿越到平行空间，摇身变成了一名重案组探员。一个邪门的奇遇系统，却让他...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>16</td>\n",
       "      <td>海贼之海军雷神</td>\n",
       "      <td>大树L</td>\n",
       "      <td>轻小说</td>\n",
       "      <td>衍生同人</td>\n",
       "      <td>新书《海贼世界的死灵法师》已上传，大家有空来看看啊！沙雕大树又忘了完本不能发新章节通知了！…...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>17</td>\n",
       "      <td>我被系统托管了</td>\n",
       "      <td>木恒轻小说</td>\n",
       "      <td>科幻</td>\n",
       "      <td>进化变异</td>\n",
       "      <td>系统有了，身体却没了，重度拖延症晚期患者方宁开始了自我救赎（苟且偷生）之路。作品讨论群号：2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>18</td>\n",
       "      <td>莽荒纪</td>\n",
       "      <td>我吃西红柿</td>\n",
       "      <td>仙侠</td>\n",
       "      <td>修真文明</td>\n",
       "      <td>纪宁死后来到阴曹地府，经判官审前生判来世，投胎到了部族纪氏。这里，有夸父逐日……有后羿射金乌...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>19</td>\n",
       "      <td>不朽凡人</td>\n",
       "      <td>鹅是老五</td>\n",
       "      <td>仙侠</td>\n",
       "      <td>幻想修仙</td>\n",
       "      <td>在这里，拥有灵根才能修仙，所有凡根注定只是凡人。莫无忌，只有凡根，一介凡人！</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    index        标题         作者  大分类   小分类  \\\n",
       "0       0      诡秘之主  爱潜水的乌贼轻小说   玄幻  异世大陆   \n",
       "1       1  学霸的黑科技系统    晨星LL轻小说   科幻  超级科技   \n",
       "2       2      大王饶命  会说话的肘子轻小说   都市  都市生活   \n",
       "3       3   出名太快怎么办      十步杀一仙   都市  娱乐明星   \n",
       "4       4       牧神记         宅猪   玄幻  东方玄幻   \n",
       "..    ...       ...        ...  ...   ...   \n",
       "95     15        狂探       旷海忘湖   都市  都市异能   \n",
       "96     16   海贼之海军雷神        大树L  轻小说  衍生同人   \n",
       "97     17   我被系统托管了      木恒轻小说   科幻  进化变异   \n",
       "98     18       莽荒纪      我吃西红柿   仙侠  修真文明   \n",
       "99     19      不朽凡人       鹅是老五   仙侠  幻想修仙   \n",
       "\n",
       "                                                   简介  \n",
       "0   蒸汽与机械的浪潮中，谁能触及非凡？历史和黑暗的迷雾里，又是谁在耳语？我从诡秘中醒来，睁眼看见...  \n",
       "1   “系统，积分能兑钱吗？”“不能。”“那我要你何用！”“本系统能让你当上学霸，你还要钱干啥？”...  \n",
       "2   高中生吕树在一场车祸中改变人生，当灵气复苏时代来袭，他要做这时代的领跑者。物竞天择，胜者为王...  \n",
       "3   由于歌手意外缺席，迫于压力，王桓站出来唱了一首歌……没想到就这样出名了。（书友群：48042...  \n",
       "4   大墟的祖训说，天黑，别出门。大墟残老村的老弱病残们从江边捡到了一个婴儿，取名秦牧，含辛茹苦将...  \n",
       "..                                                ...  \n",
       "95  一个无节操的小痞子，意外穿越到平行空间，摇身变成了一名重案组探员。一个邪门的奇遇系统，却让他...  \n",
       "96  新书《海贼世界的死灵法师》已上传，大家有空来看看啊！沙雕大树又忘了完本不能发新章节通知了！…...  \n",
       "97  系统有了，身体却没了，重度拖延症晚期患者方宁开始了自我救赎（苟且偷生）之路。作品讨论群号：2...  \n",
       "98  纪宁死后来到阴曹地府，经判官审前生判来世，投胎到了部族纪氏。这里，有夸父逐日……有后羿射金乌...  \n",
       "99             在这里，拥有灵根才能修仙，所有凡根注定只是凡人。莫无忌，只有凡根，一介凡人！  \n",
       "\n",
       "[100 rows x 6 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "\n",
    "session = HTMLSession()\n",
    "def get_info(url):\n",
    "    r = session.get( url )\n",
    "    \n",
    "    # 提取主要元素\n",
    "    infos = r.html.xpath('//ul[@class=\"all-img-list cf\"]/li')\n",
    "    # 利用浏览器检查，利用xpath准确抓取所需信息\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            '标题':      '//div[@class=\"book-mid-info\"]/h4/a',\n",
    "            '作者':      '//div[@class=\"book-mid-info\"]/p/a[@class=\"name\"]',\n",
    "            '大分类':    '//div[@class=\"book-mid-info\"]/p/a[@data-eid=\"qd_B60\"]', \n",
    "            '小分类':    '//div[@class=\"book-mid-info\"]/p/a[@class=\"go-sub-type\"]', \n",
    "            '简介':    '//div[@class=\"book-mid-info\"]/p[@class=\"intro\"]', \n",
    "        }\n",
    "     }\n",
    "    # 在主要元素下将要抓取的内容用xpath抓取\n",
    "    \n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in infos]\n",
    "        return(暂存结果)\n",
    "    \n",
    "    # 将抓取到的数据用字典形式保存起来   \n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text(v) for k,v in dict_xpaths['text'].items()}\n",
    "        \n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    return (数据)\n",
    "\n",
    "    \n",
    "    \n",
    " \n",
    "# 找寻到要翻页的url链接规律\n",
    "urls = ['https://www.qidian.com/finish?page={}'.format(str(i)) for i in range(1,6)]\n",
    "list_df = list()\n",
    "for url in urls: # 用for循环实施翻页\n",
    "    df=get_info(url)\n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df).reset_index()\n",
    "df_all.to_excel(\"期末项目——起点.xlsx\")# 输出为excel表\n",
    "df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>大分类</th>\n",
       "      <th>小分类</th>\n",
       "      <th>简介</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>诡秘之主</td>\n",
       "      <td>爱潜水的乌贼轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>蒸汽与机械的浪潮中，谁能触及非凡？历史和黑暗的迷雾里，又是谁在耳语？我从诡秘中醒来，睁眼看见...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>牧神记</td>\n",
       "      <td>宅猪</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>大墟的祖训说，天黑，别出门。大墟残老村的老弱病残们从江边捡到了一个婴儿，取名秦牧，含辛茹苦将...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>斗罗大陆</td>\n",
       "      <td>唐家三少</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>唐门外门弟子唐三，因偷学内门绝学为唐门所不容，跳崖明志时却发现没有死，反而以另外一个身份来到...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>完美世界</td>\n",
       "      <td>辰东</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>一粒尘可填海，一根草斩尽日月星辰，弹指间天翻地覆。群雄并起，万族林立，诸圣争霸，乱天动地。问...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>天道图书馆</td>\n",
       "      <td>横扫天涯</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>【2017最火玄幻作品，海外点推双榜第一】张悬穿越异界，成了一名光荣的教师，脑海中多出了一个...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>一世之尊</td>\n",
       "      <td>爱潜水的乌贼</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>我这一生，不问前尘，不求来世，只轰轰烈烈，快意恩仇，败尽各族英杰，傲笑六道神魔！万年之后，大...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>史上第一密探</td>\n",
       "      <td>沉默的糕点</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>(已完本）x疯人院爆炸，院长云中鹤穿越，29个天才精神病人进入大脑，使他拥有29个诡异天赋！...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>史上最强赘婿</td>\n",
       "      <td>沉默的糕点</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>(已完本)穿越异世成为财主家的小白脸赘婿，因太废物被赶出来。于是他发奋图强，找一个更有权有势...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>进化之眼</td>\n",
       "      <td>亚舍罗</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>高武世界</td>\n",
       "      <td>出门一把刀，装备全靠爆！网文写手白小文，被一颗神奇眼球带到了百年之后的灵能时代。灵子渗透，变...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>奥术神座</td>\n",
       "      <td>爱潜水的乌贼轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>“知识就等于力量。”“所谓神，不过是强大一点的奥术师。”带着一大堆知识的夏风穿越而来了。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>武动乾坤</td>\n",
       "      <td>天蚕土豆</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>修炼一途，乃窃阴阳，夺造化，转涅盘，握生死，掌轮回。武之极，破苍穹，动乾坤！</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>女大三千位列仙班</td>\n",
       "      <td>打死不鸽轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>陆涯沉迷于修仙网游《上古仙庭》中，五年里夜夜爆肝修仙，就在他获得史上第一个【全职业满级称号】...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>大主宰</td>\n",
       "      <td>天蚕土豆</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>大千世界，位面交汇，万族林立，群雄荟萃，一位位来自下位面的天之至尊，在这无尽世界，演绎着令人...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>将夜</td>\n",
       "      <td>猫腻</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>与天斗，其乐无穷。……</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>武道宗师</td>\n",
       "      <td>爱潜水的乌贼</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>在这里，武道不再是虚无缥缈的传说，而是切切实实的传承，经过与科技的对抗后，彻底融入了社会，有...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>明日之劫</td>\n",
       "      <td>熊狼狗轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>流星划破长空，同样划破了世界的平静。超凡的力量降临人间，是谁在黑暗中窥伺，又是谁在我耳边低语...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>长生种</td>\n",
       "      <td>月中阴</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>高武世界</td>\n",
       "      <td>“消耗一年七个月寿命，可提升童子功。”“消耗一年七个月寿命，可提升金钟罩。”“消耗一年七个月...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>人道至尊</td>\n",
       "      <td>宅猪</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>自盘古开天，三皇治世，五帝定伦，世界之间，人为天地灵长……此时正值三皇中的人皇末期，五帝未定...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>随身背着复活棺</td>\n",
       "      <td>吾乃白</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>手握烈日掌星辰，世间唯我背棺人。我为背棺人，我为自己背棺。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>随身带个狩猎空间</td>\n",
       "      <td>青空洗雨</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>陆泽穿越到了两千年后的星际时代。前身自带常年秀恩爱虐狗的父母和可爱的妹妹，虽然修炼天赋普普通...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>诸天穿越者聊天群</td>\n",
       "      <td>业界良心</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>这是一个属于穿越者的聊天群，汇聚了来自诸天万界的穿越者。从小小的群成员做起，一步步成长到群主大佬！</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>一人之力</td>\n",
       "      <td>风消逝</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>异空间降临，神祇在面前显化，远古咆哮在耳边回荡不息。叮咚！一个人吃火锅，看电影，一人值加一。...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>我家可能有位大佬</td>\n",
       "      <td>雨下的好大</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>江左有个貌美的老婆，可是她却经常出差。去干嘛？不是忙着给他带帽子，而是瞒着他降妖除魔去了。得...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>我不想当妖皇的日子</td>\n",
       "      <td>剪水II</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>【无敌文】叮！您获得了天赋【神悟】，可领悟天地，从天地里获得力量。您发现自己每天什么都不用做...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>外挂傍身的杂草</td>\n",
       "      <td>低调青年</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>来到异界，成为大佬门前一株杂草，没事就被踩一脚。系统激活，外挂傍身。要告诉这大佬，做人要有素...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           标题         作者 大分类   小分类  \\\n",
       "0        诡秘之主  爱潜水的乌贼轻小说  玄幻  异世大陆   \n",
       "4         牧神记         宅猪  玄幻  东方玄幻   \n",
       "8        斗罗大陆       唐家三少  玄幻  异世大陆   \n",
       "18       完美世界         辰东  玄幻  东方玄幻   \n",
       "1       天道图书馆       横扫天涯  玄幻  异世大陆   \n",
       "3        一世之尊     爱潜水的乌贼  玄幻  东方玄幻   \n",
       "8      史上第一密探      沉默的糕点  玄幻  东方玄幻   \n",
       "9      史上最强赘婿      沉默的糕点  玄幻  东方玄幻   \n",
       "12       进化之眼        亚舍罗  玄幻  高武世界   \n",
       "17       奥术神座  爱潜水的乌贼轻小说  玄幻  异世大陆   \n",
       "18       武动乾坤       天蚕土豆  玄幻  东方玄幻   \n",
       "7    女大三千位列仙班    打死不鸽轻小说  玄幻  东方玄幻   \n",
       "11        大主宰       天蚕土豆  玄幻  异世大陆   \n",
       "17         将夜         猫腻  玄幻  东方玄幻   \n",
       "5        武道宗师     爱潜水的乌贼  玄幻  异世大陆   \n",
       "8        明日之劫     熊狼狗轻小说  玄幻  东方玄幻   \n",
       "11        长生种        月中阴  玄幻  高武世界   \n",
       "13       人道至尊         宅猪  玄幻  东方玄幻   \n",
       "14    随身背着复活棺        吾乃白  玄幻  异世大陆   \n",
       "16   随身带个狩猎空间       青空洗雨  玄幻  东方玄幻   \n",
       "18   诸天穿越者聊天群       业界良心  玄幻  东方玄幻   \n",
       "0        一人之力        风消逝  玄幻  异世大陆   \n",
       "1    我家可能有位大佬      雨下的好大  玄幻  东方玄幻   \n",
       "6   我不想当妖皇的日子       剪水II  玄幻  东方玄幻   \n",
       "11    外挂傍身的杂草       低调青年  玄幻  异世大陆   \n",
       "\n",
       "                                                   简介  \n",
       "0   蒸汽与机械的浪潮中，谁能触及非凡？历史和黑暗的迷雾里，又是谁在耳语？我从诡秘中醒来，睁眼看见...  \n",
       "4   大墟的祖训说，天黑，别出门。大墟残老村的老弱病残们从江边捡到了一个婴儿，取名秦牧，含辛茹苦将...  \n",
       "8   唐门外门弟子唐三，因偷学内门绝学为唐门所不容，跳崖明志时却发现没有死，反而以另外一个身份来到...  \n",
       "18  一粒尘可填海，一根草斩尽日月星辰，弹指间天翻地覆。群雄并起，万族林立，诸圣争霸，乱天动地。问...  \n",
       "1   【2017最火玄幻作品，海外点推双榜第一】张悬穿越异界，成了一名光荣的教师，脑海中多出了一个...  \n",
       "3   我这一生，不问前尘，不求来世，只轰轰烈烈，快意恩仇，败尽各族英杰，傲笑六道神魔！万年之后，大...  \n",
       "8   (已完本）x疯人院爆炸，院长云中鹤穿越，29个天才精神病人进入大脑，使他拥有29个诡异天赋！...  \n",
       "9   (已完本)穿越异世成为财主家的小白脸赘婿，因太废物被赶出来。于是他发奋图强，找一个更有权有势...  \n",
       "12  出门一把刀，装备全靠爆！网文写手白小文，被一颗神奇眼球带到了百年之后的灵能时代。灵子渗透，变...  \n",
       "17       “知识就等于力量。”“所谓神，不过是强大一点的奥术师。”带着一大堆知识的夏风穿越而来了。  \n",
       "18             修炼一途，乃窃阴阳，夺造化，转涅盘，握生死，掌轮回。武之极，破苍穹，动乾坤！  \n",
       "7   陆涯沉迷于修仙网游《上古仙庭》中，五年里夜夜爆肝修仙，就在他获得史上第一个【全职业满级称号】...  \n",
       "11  大千世界，位面交汇，万族林立，群雄荟萃，一位位来自下位面的天之至尊，在这无尽世界，演绎着令人...  \n",
       "17                                        与天斗，其乐无穷。……  \n",
       "5   在这里，武道不再是虚无缥缈的传说，而是切切实实的传承，经过与科技的对抗后，彻底融入了社会，有...  \n",
       "8   流星划破长空，同样划破了世界的平静。超凡的力量降临人间，是谁在黑暗中窥伺，又是谁在我耳边低语...  \n",
       "11  “消耗一年七个月寿命，可提升童子功。”“消耗一年七个月寿命，可提升金钟罩。”“消耗一年七个月...  \n",
       "13  自盘古开天，三皇治世，五帝定伦，世界之间，人为天地灵长……此时正值三皇中的人皇末期，五帝未定...  \n",
       "14                      手握烈日掌星辰，世间唯我背棺人。我为背棺人，我为自己背棺。  \n",
       "16  陆泽穿越到了两千年后的星际时代。前身自带常年秀恩爱虐狗的父母和可爱的妹妹，虽然修炼天赋普普通...  \n",
       "18  这是一个属于穿越者的聊天群，汇聚了来自诸天万界的穿越者。从小小的群成员做起，一步步成长到群主大佬！  \n",
       "0   异空间降临，神祇在面前显化，远古咆哮在耳边回荡不息。叮咚！一个人吃火锅，看电影，一人值加一。...  \n",
       "1   江左有个貌美的老婆，可是她却经常出差。去干嘛？不是忙着给他带帽子，而是瞒着他降妖除魔去了。得...  \n",
       "6   【无敌文】叮！您获得了天赋【神悟】，可领悟天地，从天地里获得力量。您发现自己每天什么都不用做...  \n",
       "11  来到异界，成为大佬门前一株杂草，没事就被踩一脚。系统激活，外挂傍身。要告诉这大佬，做人要有素...  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对爬取出来的内容进行分析\n",
    "df_all[df_all.大分类.str.contains(\"玄幻\")] # 筛选大分类下关键词“玄幻”"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大分类</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>玄幻</th>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>仙侠</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>历史</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>科幻</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>都市</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>轻小说</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>军事</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>悬疑</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>武侠</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>奇幻</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     标题\n",
       "大分类    \n",
       "玄幻   25\n",
       "仙侠   15\n",
       "历史   14\n",
       "科幻   13\n",
       "都市   13\n",
       "轻小说   6\n",
       "游戏    5\n",
       "军事    3\n",
       "悬疑    3\n",
       "武侠    2\n",
       "奇幻    1"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_stats = df_all.groupby(by=\"大分类\").agg({\"标题\":\"count\"}).sort_values(by=\"标题\", ascending=False)\n",
    "df_stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>作者</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>荣小荣</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>爱潜水的乌贼</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>宅猪</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>跃千愁</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>辰东</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>如水意</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>奈何笑忘川</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>天煌贵胄</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>天子</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>黑山老鬼</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>88 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        标题\n",
       "作者        \n",
       "荣小荣      2\n",
       "爱潜水的乌贼   2\n",
       "宅猪       2\n",
       "跃千愁      2\n",
       "辰东       2\n",
       "...     ..\n",
       "如水意      1\n",
       "奈何笑忘川    1\n",
       "天煌贵胄     1\n",
       "天子       1\n",
       "黑山老鬼     1\n",
       "\n",
       "[88 rows x 1 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_stats1 = df_all.groupby(by=\"作者\").agg({\"标题\":\"count\"}).sort_values(by=\"标题\", ascending=False)\n",
    "df_stats1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>大分类</th>\n",
       "      <th>小分类</th>\n",
       "      <th>简介</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>18</td>\n",
       "      <td>武动乾坤</td>\n",
       "      <td>天蚕土豆</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>修炼一途，乃窃阴阳，夺造化，转涅盘，握生死，掌轮回。武之极，破苍穹，动乾坤！</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>11</td>\n",
       "      <td>大主宰</td>\n",
       "      <td>天蚕土豆</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>大千世界，位面交汇，万族林立，群雄荟萃，一位位来自下位面的天之至尊，在这无尽世界，演绎着令人...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    index    标题    作者 大分类   小分类  \\\n",
       "38     18  武动乾坤  天蚕土豆  玄幻  东方玄幻   \n",
       "51     11   大主宰  天蚕土豆  玄幻  异世大陆   \n",
       "\n",
       "                                                   简介  \n",
       "38             修炼一途，乃窃阴阳，夺造化，转涅盘，握生死，掌轮回。武之极，破苍穹，动乾坤！  \n",
       "51  大千世界，位面交汇，万族林立，群雄荟萃，一位位来自下位面的天之至尊，在这无尽世界，演绎着令人...  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all[df_all.作者.str.contains(\"天蚕土豆\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>大分类</th>\n",
       "      <th>小分类</th>\n",
       "      <th>简介</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>诡秘之主</td>\n",
       "      <td>爱潜水的乌贼轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>蒸汽与机械的浪潮中，谁能触及非凡？历史和黑暗的迷雾里，又是谁在耳语？我从诡秘中醒来，睁眼看见...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>17</td>\n",
       "      <td>奥术神座</td>\n",
       "      <td>爱潜水的乌贼轻小说</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>异世大陆</td>\n",
       "      <td>“知识就等于力量。”“所谓神，不过是强大一点的奥术师。”带着一大堆知识的夏风穿越而来了。</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    index    标题         作者 大分类   小分类  \\\n",
       "0       0  诡秘之主  爱潜水的乌贼轻小说  玄幻  异世大陆   \n",
       "37     17  奥术神座  爱潜水的乌贼轻小说  玄幻  异世大陆   \n",
       "\n",
       "                                                   简介  \n",
       "0   蒸汽与机械的浪潮中，谁能触及非凡？历史和黑暗的迷雾里，又是谁在耳语？我从诡秘中醒来，睁眼看见...  \n",
       "37       “知识就等于力量。”“所谓神，不过是强大一点的奥术师。”带着一大堆知识的夏风穿越而来了。  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all[df_all.作者.str.contains(\"爱潜水的乌贼轻小说\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>大分类</th>\n",
       "      <th>小分类</th>\n",
       "      <th>简介</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>7</td>\n",
       "      <td>庆余年</td>\n",
       "      <td>猫腻</td>\n",
       "      <td>历史</td>\n",
       "      <td>架空历史</td>\n",
       "      <td>积善之家，必有余庆，留余庆，留余庆，忽遇恩人；幸娘亲，幸娘亲，积得阴功。劝人生，济困扶穷……...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>17</td>\n",
       "      <td>将夜</td>\n",
       "      <td>猫腻</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>与天斗，其乐无穷。……</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    index   标题  作者 大分类   小分类  \\\n",
       "27      7  庆余年  猫腻  历史  架空历史   \n",
       "57     17   将夜  猫腻  玄幻  东方玄幻   \n",
       "\n",
       "                                                   简介  \n",
       "27  积善之家，必有余庆，留余庆，留余庆，忽遇恩人；幸娘亲，幸娘亲，积得阴功。劝人生，济困扶穷……...  \n",
       "57                                        与天斗，其乐无穷。……  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all[df_all.作者.str.contains(\"猫腻\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>作者</th>\n",
       "      <th>大分类</th>\n",
       "      <th>小分类</th>\n",
       "      <th>简介</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>遮天</td>\n",
       "      <td>辰东</td>\n",
       "      <td>仙侠</td>\n",
       "      <td>修真文明</td>\n",
       "      <td>冰冷与黑暗并存的宇宙深处，九具庞大的龙尸拉着一口青铜古棺，亘古长存。这是太空探测器在枯寂的宇...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>完美世界</td>\n",
       "      <td>辰东</td>\n",
       "      <td>玄幻</td>\n",
       "      <td>东方玄幻</td>\n",
       "      <td>一粒尘可填海，一根草斩尽日月星辰，弹指间天翻地覆。群雄并起，万族林立，诸圣争霸，乱天动地。问...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    index    标题  作者 大分类   小分类  \\\n",
       "10     10    遮天  辰东  仙侠  修真文明   \n",
       "18     18  完美世界  辰东  玄幻  东方玄幻   \n",
       "\n",
       "                                                   简介  \n",
       "10  冰冷与黑暗并存的宇宙深处，九具庞大的龙尸拉着一口青铜古棺，亘古长存。这是太空探测器在枯寂的宇...  \n",
       "18  一粒尘可填海，一根草斩尽日月星辰，弹指间天翻地覆。群雄并起，万族林立，诸圣争霸，乱天动地。问...  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all[df_all.作者.str.contains(\"辰东\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
